{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from sklearn.preprocessing import MinMaxScaler, StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = [[-1,2],[-0.5,6],[0,10],[1,18]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据归一化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.  , 0.  ],\n",
       "       [0.25, 0.25],\n",
       "       [0.5 , 0.5 ],\n",
       "       [1.  , 1.  ]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ms = MinMaxScaler()\n",
    "ms = ms.fit(data)\n",
    "result = ms.transform(data)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1. ,  2. ],\n",
       "       [-0.5,  6. ],\n",
       "       [ 0. , 10. ],\n",
       "       [ 1. , 18. ]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ms.inverse_transform(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "StandardScaler()"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ss = StandardScaler()\n",
    "ss.fit(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.125,  9.   ])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ss.mean_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.546875, 35.      ])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ss.var_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_std = ss.transform(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.18321596, -1.18321596],\n",
       "       [-0.50709255, -0.50709255],\n",
       "       [ 0.16903085,  0.16903085],\n",
       "       [ 1.52127766,  1.52127766]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_std.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1. ,  2. ],\n",
       "       [-0.5,  6. ],\n",
       "       [ 0. , 10. ],\n",
       "       [ 1. , 18. ]])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ss.inverse_transform(x_std)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 缺失值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>female</td>\n",
       "      <td>C</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age     Sex Embarked Survived\n",
       "0  22.0    male        S       No\n",
       "1  38.0  female        C      Yes\n",
       "2  26.0  female        S      Yes\n",
       "3  35.0  female        S      Yes\n",
       "4  35.0    male        S       No"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(\"../data/Narrativedata.csv\", index_col=0)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Age         177\n",
       "Sex           0\n",
       "Embarked      2\n",
       "Survived      0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 891 entries, 0 to 890\n",
      "Data columns (total 4 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   Age       714 non-null    float64\n",
      " 1   Sex       891 non-null    object \n",
      " 2   Embarked  889 non-null    object \n",
      " 3   Survived  891 non-null    object \n",
      "dtypes: float64(1), object(3)\n",
      "memory usage: 34.8+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 中位数填充数据\n",
    "data.loc[:, \"Age\"] = data.loc[:,\"Age\"].fillna(data.loc[:, \"Age\"].median())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 891 entries, 0 to 890\n",
      "Data columns (total 4 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   Age       891 non-null    float64\n",
      " 1   Sex       891 non-null    object \n",
      " 2   Embarked  889 non-null    object \n",
      " 3   Survived  891 non-null    object \n",
      "dtypes: float64(1), object(3)\n",
      "memory usage: 34.8+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 889 entries, 0 to 890\n",
      "Data columns (total 4 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   Age       889 non-null    float64\n",
      " 1   Sex       889 non-null    object \n",
      " 2   Embarked  889 non-null    object \n",
      " 3   Survived  889 non-null    object \n",
      "dtypes: float64(1), object(3)\n",
      "memory usage: 34.7+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>female</td>\n",
       "      <td>C</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age     Sex Embarked Survived\n",
       "0  22.0    male        S       No\n",
       "1  38.0  female        C      Yes\n",
       "2  26.0  female        S      Yes\n",
       "3  35.0  female        S      Yes\n",
       "4  35.0    male        S       No"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 处理分类型特征：编码与哑变量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import LabelEncoder"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 处理标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = data.iloc[:, -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "le = LabelEncoder()\n",
    "le = le.fit(y)\n",
    "label = le.transform(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['No', 'Unknown', 'Yes'], dtype=object)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "le.classes_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 1, 2, 0, 0, 0, 1, 0, 2, 0, 2, 1, 2,\n",
       "       2, 2, 0, 1, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 1,\n",
       "       2, 0, 0, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 0, 0, 0, 0, 0, 2, 2,\n",
       "       0, 2, 0, 0, 0, 0, 0, 2, 1, 0, 1, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2,\n",
       "       0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 2, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 2, 2, 0, 1, 0, 0,\n",
       "       2, 0, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 0, 0, 1, 0, 1, 1, 0, 0,\n",
       "       0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 1,\n",
       "       0, 0, 0, 0, 0, 0, 1, 2, 0, 2, 2, 0, 0, 2, 0, 2, 1, 2, 2, 0, 0, 1,\n",
       "       0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 2, 1, 2, 1, 0, 0, 2, 2, 0, 2, 0, 2,\n",
       "       0, 0, 0, 2, 0, 2, 0, 0, 0, 2, 1, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0,\n",
       "       0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 1, 0, 2, 2, 2, 2, 2, 0, 2, 0, 1, 0,\n",
       "       0, 1, 2, 2, 1, 0, 2, 2, 0, 2, 2, 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 2,\n",
       "       0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 2, 0, 2, 2, 2, 0,\n",
       "       2, 2, 2, 0, 0, 0, 2, 2, 1, 2, 2, 0, 1, 2, 2, 0, 2, 0, 1, 2, 2, 2,\n",
       "       0, 1, 0, 2, 0, 0, 2, 1, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0,\n",
       "       0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 0, 0, 0, 0,\n",
       "       2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 2, 2, 0, 2, 2, 0, 0,\n",
       "       0, 0, 2, 2, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 1, 2, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2,\n",
       "       0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 1, 2, 0, 2, 2, 0, 2, 1, 0,\n",
       "       0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 2, 2,\n",
       "       1, 2, 0, 0, 1, 0, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 2, 1, 2,\n",
       "       2, 1, 2, 2, 0, 2, 2, 1, 0, 2, 0, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0,\n",
       "       0, 2, 0, 1, 2, 0, 2, 0, 2, 0, 2, 2, 0, 1, 2, 0, 0, 2, 2, 1, 2, 2,\n",
       "       0, 0, 2, 2, 0, 2, 0, 2, 2, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 2, 2, 1,\n",
       "       2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 0, 0, 2, 0, 2, 1, 0, 0, 2, 0, 0, 0,\n",
       "       0, 2, 0, 0, 2, 2, 0, 0, 0, 2, 0, 1, 2, 2, 1, 0, 0, 2, 0, 0, 1, 0,\n",
       "       0, 2, 0, 0, 2, 2, 0, 0, 0, 1, 2, 1, 0, 1, 0, 2, 0, 0, 2, 0, 0, 0,\n",
       "       0, 0, 2, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 1,\n",
       "       0, 0, 0, 2, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 2, 0,\n",
       "       0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 2, 0, 0, 2, 2, 0, 0, 0,\n",
       "       0, 1, 2, 2, 2, 2, 0, 1, 0, 1, 1, 2, 1, 0, 0, 2, 0, 0, 0, 2, 0, 2,\n",
       "       2, 1, 1, 2, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 0,\n",
       "       0, 2, 2, 1, 0, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 0, 0, 0,\n",
       "       1, 0, 0, 1, 1, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0,\n",
       "       0, 0, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 1, 0, 1, 0,\n",
       "       0, 0, 0, 0, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0,\n",
       "       2, 2, 0, 0, 2, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 2, 0, 2, 2, 2, 2, 0,\n",
       "       0, 0, 2, 0, 1, 1, 1, 0, 0, 2, 0, 1, 0, 0, 2, 2, 0, 0, 0, 2, 2, 0,\n",
       "       0, 1, 0, 0, 0, 2, 0, 1, 0])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'Yes',\n",
       "       'Unknown', 'Yes', 'No', 'No', 'No', 'Unknown', 'No', 'Yes', 'No',\n",
       "       'Yes', 'Unknown', 'Yes', 'Yes', 'Yes', 'No', 'Unknown', 'No', 'No',\n",
       "       'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'No',\n",
       "       'No', 'Yes', 'No', 'No', 'No', 'Unknown', 'Yes', 'No', 'No', 'Yes',\n",
       "       'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No',\n",
       "       'Yes', 'No', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes',\n",
       "       'No', 'No', 'No', 'No', 'No', 'Yes', 'Unknown', 'No', 'Unknown',\n",
       "       'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No',\n",
       "       'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'Unknown', 'Yes',\n",
       "       'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'Yes', 'Yes',\n",
       "       'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No',\n",
       "       'No', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes',\n",
       "       'No', 'Unknown', 'No', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'No',\n",
       "       'No', 'Unknown', 'Unknown', 'Yes', 'No', 'No', 'No', 'Yes', 'No',\n",
       "       'No', 'Unknown', 'No', 'Unknown', 'Unknown', 'No', 'No', 'No',\n",
       "       'Yes', 'No', 'No', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes',\n",
       "       'Yes', 'No', 'No', 'No', 'Unknown', 'No', 'Yes', 'No', 'No', 'No',\n",
       "       'Unknown', 'No', 'No', 'No', 'No', 'No', 'No', 'Unknown', 'Yes',\n",
       "       'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'Unknown',\n",
       "       'Yes', 'Yes', 'No', 'No', 'Unknown', 'No', 'No', 'No', 'No', 'No',\n",
       "       'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Unknown', 'Yes',\n",
       "       'Unknown', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes',\n",
       "       'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes',\n",
       "       'Unknown', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'No', 'No',\n",
       "       'Yes', 'No', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'No',\n",
       "       'No', 'No', 'Unknown', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes',\n",
       "       'No', 'Yes', 'No', 'Unknown', 'No', 'No', 'Unknown', 'Yes', 'Yes',\n",
       "       'Unknown', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No',\n",
       "       'Unknown', 'Unknown', 'No', 'No', 'No', 'Yes', 'No', 'No', 'Yes',\n",
       "       'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No', 'No', 'No',\n",
       "       'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Unknown', 'Yes', 'No', 'Yes',\n",
       "       'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes',\n",
       "       'Yes', 'Unknown', 'Yes', 'Yes', 'No', 'Unknown', 'Yes', 'Yes',\n",
       "       'No', 'Yes', 'No', 'Unknown', 'Yes', 'Yes', 'Yes', 'No', 'Unknown',\n",
       "       'No', 'Yes', 'No', 'No', 'Yes', 'Unknown', 'No', 'Yes', 'Yes',\n",
       "       'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No',\n",
       "       'No', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No',\n",
       "       'No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Unknown', 'Yes',\n",
       "       'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes',\n",
       "       'Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes',\n",
       "       'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'Yes',\n",
       "       'Unknown', 'No', 'No', 'No', 'Unknown', 'No', 'Yes', 'No', 'No',\n",
       "       'No', 'No', 'Yes', 'No', 'Yes', 'No', 'Unknown', 'Yes', 'No', 'No',\n",
       "       'No', 'No', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes',\n",
       "       'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No',\n",
       "       'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes',\n",
       "       'No', 'No', 'No', 'Yes', 'Unknown', 'Yes', 'No', 'Yes', 'Yes',\n",
       "       'No', 'Yes', 'Unknown', 'No', 'No', 'No', 'No', 'No', 'No', 'No',\n",
       "       'Unknown', 'Unknown', 'No', 'Yes', 'Yes', 'No', 'No', 'Unknown',\n",
       "       'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'Unknown',\n",
       "       'Yes', 'No', 'No', 'Unknown', 'No', 'No', 'Unknown', 'No', 'No',\n",
       "       'No', 'Yes', 'Unknown', 'No', 'No', 'No', 'No', 'No', 'No', 'Yes',\n",
       "       'Unknown', 'Yes', 'Yes', 'Unknown', 'Yes', 'Yes', 'No', 'Yes',\n",
       "       'Yes', 'Unknown', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No',\n",
       "       'No', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No',\n",
       "       'Unknown', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes',\n",
       "       'No', 'Unknown', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Unknown', 'Yes',\n",
       "       'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes',\n",
       "       'No', 'No', 'No', 'No', 'Unknown', 'No', 'Unknown', 'No',\n",
       "       'Unknown', 'Unknown', 'Yes', 'Yes', 'Unknown', 'Yes', 'No', 'No',\n",
       "       'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes',\n",
       "       'No', 'Yes', 'Unknown', 'No', 'No', 'Yes', 'No', 'No', 'No', 'No',\n",
       "       'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'No',\n",
       "       'Unknown', 'Yes', 'Yes', 'Unknown', 'No', 'No', 'Yes', 'No', 'No',\n",
       "       'Unknown', 'No', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No',\n",
       "       'No', 'Unknown', 'Yes', 'Unknown', 'No', 'Unknown', 'No', 'Yes',\n",
       "       'No', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'Yes', 'No',\n",
       "       'Yes', 'Yes', 'Yes', 'Unknown', 'Yes', 'No', 'Yes', 'No', 'Yes',\n",
       "       'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'Unknown', 'No',\n",
       "       'No', 'No', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No',\n",
       "       'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes',\n",
       "       'No', 'No', 'No', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes',\n",
       "       'Yes', 'Unknown', 'Unknown', 'Unknown', 'Unknown', 'Yes', 'No',\n",
       "       'No', 'Yes', 'Yes', 'No', 'No', 'No', 'No', 'Unknown', 'Yes',\n",
       "       'Yes', 'Yes', 'Yes', 'No', 'Unknown', 'No', 'Unknown', 'Unknown',\n",
       "       'Yes', 'Unknown', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No',\n",
       "       'Yes', 'Yes', 'Unknown', 'Unknown', 'Yes', 'No', 'Unknown', 'No',\n",
       "       'No', 'No', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No',\n",
       "       'Yes', 'No', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Unknown',\n",
       "       'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'No', 'Yes',\n",
       "       'Yes', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'Unknown', 'No',\n",
       "       'No', 'Unknown', 'Unknown', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes',\n",
       "       'No', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'No', 'No',\n",
       "       'No', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'Yes',\n",
       "       'Yes', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'No', 'Unknown', 'No',\n",
       "       'Unknown', 'No', 'No', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No',\n",
       "       'Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No',\n",
       "       'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No',\n",
       "       'No', 'Unknown', 'No', 'No', 'No', 'Unknown', 'Unknown', 'No',\n",
       "       'Unknown', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No',\n",
       "       'No', 'Yes', 'No', 'Unknown', 'Unknown', 'Unknown', 'No', 'No',\n",
       "       'Yes', 'No', 'Unknown', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'No',\n",
       "       'Yes', 'Yes', 'No', 'No', 'Unknown', 'No', 'No', 'No', 'Yes', 'No',\n",
       "       'Unknown', 'No'], dtype=object)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "le.inverse_transform(label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>female</td>\n",
       "      <td>C</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age     Sex Embarked  Survived\n",
       "0  22.0    male        S         0\n",
       "1  38.0  female        C         2\n",
       "2  26.0  female        S         2\n",
       "3  35.0  female        S         2\n",
       "4  35.0    male        S         0"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 上面步骤等价于\n",
    "data.iloc[:, -1] = LabelEncoder().fit_transform(data.iloc[:, -1])\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 给特征进行编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import OrdinalEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_ = data.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>female</td>\n",
       "      <td>C</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age     Sex Embarked  Survived\n",
       "0  22.0    male        S         0\n",
       "1  38.0  female        C         2\n",
       "2  26.0  female        S         2\n",
       "3  35.0  female        S         2\n",
       "4  35.0    male        S         0"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age  Sex  Embarked  Survived\n",
       "0  22.0  1.0       2.0         0\n",
       "1  38.0  0.0       0.0         2\n",
       "2  26.0  0.0       2.0         2\n",
       "3  35.0  0.0       2.0         2\n",
       "4  35.0  1.0       2.0         0"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_.iloc[:, 1:-1] = OrdinalEncoder().fit_transform(data_.iloc[:,1:-1])\n",
    "data_.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array(['female', 'male'], dtype=object), array(['C', 'Q', 'S'], dtype=object)]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "OrdinalEncoder().fit(data.iloc[:, 1:-1]).categories_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# OneHot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>female</td>\n",
       "      <td>C</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age     Sex Embarked  Survived\n",
       "0  22.0    male        S         0\n",
       "1  38.0  female        C         2\n",
       "2  26.0  female        S         2\n",
       "3  35.0  female        S         2\n",
       "4  35.0    male        S         0"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import OneHotEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 1., 0., 0., 1.],\n",
       "       [1., 0., 1., 0., 0.],\n",
       "       [1., 0., 0., 0., 1.],\n",
       "       ...,\n",
       "       [1., 0., 0., 0., 1.],\n",
       "       [0., 1., 1., 0., 0.],\n",
       "       [0., 1., 0., 1., 0.]])"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = data.iloc[:, 1:-1]\n",
    "enc = OneHotEncoder(categories='auto').fit(x)\n",
    "result = enc.transform(x).toarray()\n",
    "result "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>female</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>884</th>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>885</th>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>886</th>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>887</th>\n",
       "      <td>male</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>888</th>\n",
       "      <td>male</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>889 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          0  1\n",
       "0      male  S\n",
       "1    female  C\n",
       "2    female  S\n",
       "3    female  S\n",
       "4      male  S\n",
       "..      ... ..\n",
       "884    male  S\n",
       "885  female  S\n",
       "886  female  S\n",
       "887    male  C\n",
       "888    male  Q\n",
       "\n",
       "[889 rows x 2 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(enc.inverse_transform(result))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['x0_female', 'x0_male', 'x1_C', 'x1_Q', 'x1_S'], dtype=object)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "enc.get_feature_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>female</td>\n",
       "      <td>C</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>female</td>\n",
       "      <td>S</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>male</td>\n",
       "      <td>S</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age     Sex Embarked  Survived    0    1    2    3    4\n",
       "0  22.0    male        S       0.0  0.0  1.0  0.0  0.0  1.0\n",
       "1  38.0  female        C       2.0  1.0  0.0  1.0  0.0  0.0\n",
       "2  26.0  female        S       2.0  1.0  0.0  0.0  0.0  1.0\n",
       "3  35.0  female        S       2.0  1.0  0.0  0.0  0.0  1.0\n",
       "4  35.0    male        S       0.0  0.0  1.0  0.0  0.0  1.0"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "newdata = pd.concat([data,pd.DataFrame(result)],axis=1)\n",
    "newdata.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Survived</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age  Survived    0    1    2    3    4\n",
       "0  22.0       0.0  0.0  1.0  0.0  0.0  1.0\n",
       "1  38.0       2.0  1.0  0.0  1.0  0.0  0.0\n",
       "2  26.0       2.0  1.0  0.0  0.0  0.0  1.0\n",
       "3  35.0       2.0  1.0  0.0  0.0  0.0  1.0\n",
       "4  35.0       0.0  0.0  1.0  0.0  0.0  1.0"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "newdata.drop([\"Sex\", \"Embarked\"], axis=1, inplace=True)\n",
    "newdata.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Female</th>\n",
       "      <th>Male</th>\n",
       "      <th>Embarked_C</th>\n",
       "      <th>Embarked_Q</th>\n",
       "      <th>Embarked_S</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age  Survived  Female  Male  Embarked_C  Embarked_Q  Embarked_S\n",
       "0  22.0       0.0     0.0   1.0         0.0         0.0         1.0\n",
       "1  38.0       2.0     1.0   0.0         1.0         0.0         0.0\n",
       "2  26.0       2.0     1.0   0.0         0.0         0.0         1.0\n",
       "3  35.0       2.0     1.0   0.0         0.0         0.0         1.0\n",
       "4  35.0       0.0     0.0   1.0         0.0         0.0         1.0"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "newdata.columns = [\"Age\",\"Survived\",\"Female\",\"Male\",\"Embarked_C\",\"Embarked_Q\",\"Embarked_S\"]\n",
    "newdata.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 二值化与分箱"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 二值化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "data2 = data.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.]])"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import Binarizer\n",
    "x = data2.iloc[:, 0].values.reshape(-1,1)\n",
    "transform = Binarizer(threshold=30).fit_transform(x)\n",
    "transform"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 分箱"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.]])"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import KBinsDiscretizer\n",
    "\n",
    "# 对年龄进行分箱\n",
    "x = data.iloc[:, 0].values.reshape(-1,1)\n",
    "est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n",
    "x = est.fit_transform(x)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0.0, 1.0, 2.0}"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(est.fit_transform(x).ravel())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 0., 0.],\n",
       "       [0., 1., 0.],\n",
       "       [1., 0., 0.],\n",
       "       ...,\n",
       "       [0., 1., 0.],\n",
       "       [1., 0., 0.],\n",
       "       [0., 1., 0.]])"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = data.iloc[:, 0].values.reshape(-1,1)\n",
    "est = KBinsDiscretizer(n_bins=3, encode='onehot', strategy='uniform')\n",
    "est.fit_transform(x).toarray()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 特征选择"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(42000, 784)"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "data = pd.read_csv(\"../data/digit_recognizor.csv\")\n",
    "x = data.iloc[:, 1:]\n",
    "y = data.iloc[:, 0]\n",
    "\n",
    "x.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 方差过滤"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "一个特征的方差很小代表这个特征基本上没什么差异，所以可能用处不大，优先消除方差为0的特征。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_selection import VarianceThreshold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(42000, 708)"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = VarianceThreshold().fit_transform(x)\n",
    "x.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "也可以手动选择方差进行过滤，例如用方差的中位数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_fsvar = VarianceThreshold(np.median(x.var())).fit_transform(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 方差过滤对模型的影响"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.model_selection import cross_val_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### KNN未过滤特征前"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cross_val_score(KNeighborsClassifier(), x, y, cv=5).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### KNN过滤特征后"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cross_val_score(KNeighborsClassifier(), X_fsvar, y, cv=5).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 随机森林未过滤特征前"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9370238095238095"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cross_val_score(RandomForestClassifier(n_estimators=10, random_state=0), x, y, cv=5).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 随机森林过滤特征后"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9357142857142857"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cross_val_score(RandomForestClassifier(n_estimators=10, random_state=0), X_fsvar, y, cv=5).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 相关性过滤"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 卡方过滤\n",
    "\n",
    "卡方过滤是专门针对离散型标签(分类问题)的相关性过滤，不能计算复数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_selection import SelectKBest, chi2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(42000, 200)"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_fschi = SelectKBest(chi2, k=200).fit_transform(X_fsvar, y)\n",
    "x_fschi.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9289761904761905"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cross_val_score(RandomForestClassifier(n_estimators=10, random_state=0), x_fschi,y,cv=5).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxV9Z3/8deHsO9BwhpIQFFB9lwCam3r0oq1al2rqCwB1Hawq7aO2qmdTqfa/tqOM9pSFxBUpFbHuvShljq1tFoICQQEBEHWgEIQQthCSPL5/XEPek0DuYEk597c9/PxuA9yzvme5H0ON99PzvK9x9wdERFJPS3CDiAiIuFQARARSVEqACIiKUoFQEQkRakAiIikqJZhB6iP7t27e3Z2dtgxRESSSmFh4S53z6g5P6kKQHZ2NgUFBWHHEBFJKma2ubb5OgUkIpKiVABERFKUCoCISIpSARARSVEqACIiKUoFQEQkRakAiIikKBUAEZEEVXqwgv9bs4OfvbaG7aWHGvz7J9VAMBGR5srd2bDrAIWb9lC4eQ+FW/awfud+ANJaGDlZ6fTp2q5Bf6YKgIhICMqPVLGieC8Fm3ezdHO0099z8AgAXdq1IicrnStH9SUnK53hmV1o37rhu2sVABGRJrCzrJyCoKMv2LyHVdv2UlkdfSLjwIwOXDS4JzlZ6USy0xnYvSMtWlijZ1IBEBFpYFXVztoP91G4effHHX7xnug5/DYtWzAisyvTzhtIJCud0VnpdOvQOpScKgAiIidpX/kRlm0pjZ6737yHZVv2cKCiCoCMTm2IZKUz+ZxscrLSOatPF1q3TIz7b1QARETqwd0p3nOIgqN/3W/aw9od+3CHFgZn9OrMlaP7EsnqRk5WOpnp7TBr/NM5J0IFQETkOCoqq1m5fS9Lg86+cMseSvYdBqBjm5aM6t+V8UN7kZOVzsh+XenUtlXIieOnAiAiEmP3gYqPT+UUbt7N8uK9VFRWA9CvWzvOPfUUcrK7kdM/nTN6dSKtCS7WNhYVABFJWdXVzvsl+z++ULt08x427DoAQKs046w+XZg4LoucrHRystLp0bltyIkblgqAiKSMQxVVFG0tZemWPRRs2s3SLaXsPRS99z69fStysrpxbaQfkex0hvXtQttWaSEnblwqACLSbH2w99DHF2qXbtnD6u1lH997P6hHRy4Z2ovRWelEstIZ0L1Dwl6sbSwqACLSLFRWVbPmw32fOp2zLfj8nLatWjCyX1du/dxAIlndGNW/K13bh3PvfSJRARCRpLT30BGWbdnz8QXboq2lHAzuve/VuS052elM/cwAItnpDO7dmVZpiXHvfSJRARCRhOfubP7o4Kf+un9v5yf33g/p05lrczKjd+dkpdO3gT80rblSARCRhHO4soqV2/ZG77vfHD1/v2t/BQCd2rZkdP90Lh3em0hWOiP6daVDG3VlJ0J7TURCV7LvMEtjTue8U7yXiqrovffZp7Tns6dnfDyydlCPpvmgtFSgAiAiTcrdWbdzP0s27f64w9/80UEAWqe1YFhmF6acm83orHRG908no1ObkBM3XyoAItIkDldW8ccVHzDrrY2s3FYGQPeOrRndP50bx/YnJyudoX270KZl8773PpGoAIhIoyrZd5inF2/mqUVb2LX/MKf16Mi/X3EWnx2UQdYp7VPu3vtEogIgIo1i5ba9zH5rEy8v305FVTXnn5HBlHMHcN6g7ur0E4QKgIg0mKpqZ8HqD5n1903kb9pN+9ZpXJ/bj0nnZHNqRsew40kNKgAictL2HjrC75ZsYc7bm9lWeojM9Hbce+lgro30o0u75Pl45FQTVwEws/HAg0Aa8Ji7319jeTowCzgVKAfy3H2lmbUFFgJtgp/1nLv/sMa6dwA/BzLcfddJbo+INKH3S/bzxFubeH5pMQcrqhg7oBs/+PIQvjCkZ1J/THKqqLMAmFka8DDwBaAYWGJmL7n76phmdwNF7n6lmZ0ZtL8QOAxc4O77zawV8Hcze9XdFwXfu1/wfbc06FaJSKNxdxau28Wsv2/kr++V0DqtBZeP7MOUc7M5q0+XsONJPcRzBJALrHf3DQBmNh+4AogtAEOAnwK4+xozyzaznu6+A9gftGkVvDxmvV8B3wNePKmtEJFGd7CikueXbuOJtzbyfskBMjq14TtfOJ0JY/vTvaPu1U9G8RSAvsDWmOliYGyNNsuBq4j+hZ8LZAGZwI7gCKIQOA142N0XA5jZ5cA2d19+vDsCzOwW4BaA/v37x7NNItKAtpUeYu7bm3gmfwtl5ZUM69uFX311BJcO65MwDzeXExNPAaitd/Ya0/cDD5pZEfAOsAyoBHD3KmCkmXUFXjCzocAG4B7gi3X9cHd/BHgEIBKJ1Py5ItII3J2CzXuY/dZGXlv5IWbG+LN6MeXcbHKy0nUbZzMRTwEoBvrFTGcC22MbuHsZMAXAou+MjcErtk2pmb0JjAdeBwYAR//6zwSWmlmuu394QlsiIiet5mjdLu1aMf2zA5l4drY+YbMZiqcALAEGmdkAYBtwPTAhtkHw1/1Bd68ApgEL3b3MzDKAI0Hn3w64CHjA3d8BesSsvwmI6C4gkXDUNlr3J1cO5cpRfWnfWneLN1d1/s+6e6WZzSD6V3saMMvdV5nZbcHymcBgYK6ZVRG9ODw1WL03MCe4DtACeNbdX2mE7RCRE6DRuqnN3JPntHokEvGCgoKwY4gktcqqahas3sHstz4ZrXtNTqZG6zZjZlbo7pGa83VsJ5Iiahute8+XBnPdGI3WTVUqACLNnEbryrGoAIg0QxqtK/FQARBpRmobrfvti6KjdfVkLalJBUCkGSjec5An/7FZo3WlXlQARJKURuvKyVIBEEkyGq0rDUUFQCRJaLSuNDS9a0QSXM3Rup8/I4M8jdaVBqACIJKAahutq2frSkNTARBJIBqtK01JBUAkAWi0roRBBUAkJBqtK2FTARBpYhqtK4lCBUCkiWi0riQaFQCRRqTRupLIVABEGoFG60oyUAEQaUC1jdb9j68M5arRGq0riUfvSJEGoNG6koxUAEROQnW189NX3+XRv23UaF1JOioAIieoorKa7z23nD8Ubeemcf258+IzNVpXkooKgMgJ2H+4kq89Vcjf1u3izovP4OufP1WneiTpqACI1NOu/YeZMnsJqz8o42fXDOe6SL+wI4mcEBUAkXrY8tFBJs5azIdl5Txycw4XDu4ZdiSRE6YCIBKnldv2Mnn2Eiqrq3l62jhystLDjiRyUlQAROLw1vpd3PpkIV3atWJ+3lhO69Ep7EgiJy2uDyAxs/FmttbM1pvZXbUsTzezF8xshZnlm9nQYH7bYHq5ma0ysx/FrPPjoH2Rmf3JzPo03GaJNJyXlm9n8ux8+nZtx/NfO0edvzQbdRYAM0sDHgYuAYYAN5jZkBrN7gaK3H04MBF4MJh/GLjA3UcAI4HxZjYuWPZzdx/u7iOBV4B/O+mtEWlgs9/ayDeeWcaofuk8e9vZ9OrSNuxIIg0mniOAXGC9u29w9wpgPnBFjTZDgDcA3H0NkG1mPT1qf9CmVfDyoF1ZzPodjs4XSQTuzgOvreFHL6/m4rN6Mndqru7xl2YnngLQF9gaM10czIu1HLgKwMxygSwgM5hOM7MiYCewwN0XH13JzH5iZluBGznGEYCZ3WJmBWZWUFJSEt9WiZyEI1XV3PH7FfzmzfeZMLY/v74xh7at0sKOJdLg4ikAtY1uqfnX+v1AetDR3w4sAyoB3L0qOM2TCeQevT4QLLvH3fsBTwMzavvh7v6Iu0fcPZKRkRFHXJETd7CiklvmFvD80mK+fdHp/OQrQ/VIRmm24rkLqBiIHemSCWyPbRCczpkCYNHhkBuDV2ybUjN7ExgPrKzxM+YBfwR+WI/sIg1q94EK8p5YworiUv7zymFMGNs/7EgijSqeI4AlwCAzG2BmrYHrgZdiG5hZ12AZwDRgobuXmVmGmXUN2rQDLgLWBNODYr7F5Ufni4SheM9Brpn5Nqs/KOM3N+Wo85eUUOcRgLtXmtkM4HUgDZjl7qvM7LZg+UxgMDDXzKqA1cDUYPXewJzgTqIWwLPu/kqw7H4zOwOoBjYDtzXgdonE7d0Pypg0K5/yI1U8PW0sY7K7hR1JpEmYe/LcfBOJRLygoCDsGNKMLNrwEdPnFtChdUvm5OVyRi/d4y/Nj5kVunuk5nyNBJaU9drKD/jG/CL6d2vPnLxcPapRUo4KgKSkJxdt5t9eXMmofl15fNIY0ju0rnslkWZGBUBSirvzqwXv8d//t54Lz+zBQxNG06617vGX1KQCICmjsqqaH7y4kmfyt3JdJJP/vHIYLdPi+jgskWZJBUBSQvmRKmbMW8af393BjPNP47tfPF1P8JKUpwIgzV7pwQqmzSmgcMsefnT5WUw6JzvsSCIJQQVAmrXtpYeYNCufzR8d5KEbRnPp8N5hRxJJGCoA0myt27GPibPy2V9eyRN5Yzjn1O5hRxJJKCoA0iwVbNrN1DkFtG7Zgt/dejZD+nQOO5JIwlEBkGZnweodzJi3lD5d2zE3L5d+3dqHHUkkIakASLMyP38Ld7/wDsP6dmHW5DGc0rFN2JFEEpYKgDQL7s5D/7eeXyx4j8+dnsGvbxxNhzZ6e4scj35DJOlVVTv3vbSKJxdt5qrRfXng6uG00gAvkTqpAEhSKz9Sxbd/V8SrKz/k1s8N5K7xZ2qAl0icVAAkaZWVH2H6nAIWb9zNvZcOZtp5A8OOJJJUVAAkKe0oK2fSrHzeL9nPg9eP5IqRfcOOJJJ0VAAk6bxfsp+Jj+dTerCCWZPHcN6gjLAjiSQlFQBJKsu27CHviSWktTDm33I2wzK7hB1JJGmpAEjS+MvanXz9qaVkdGrD3Lxcsrt3CDuSSFJTAZCk8FxhMd9/fgWDe3di9uRcMjppgJfIyVIBkITm7sz86wYeeG0NnzmtOzNvzqGjBniJNAj9JknCqq52fvzH1cx+axOXjejDL64dQeuWGuAl0lBUACQhHa6s4o7fr+Dl5dvJO3cA9146mBYtNMBLpCGpAEjC2Vd+hNueKuSt9R9x1yVncutnB2p0r0gjUAGQhFKy7zCTZ+ez5sN9/OLaEVydkxl2JJFmK64TqmY23szWmtl6M7urluXpZvaCma0ws3wzGxrMbxtMLzezVWb2o5h1fm5ma4J1XjCzrg23WZKMNu06wNW/eZsNJQd4bFJEnb9II6uzAJhZGvAwcAkwBLjBzIbUaHY3UOTuw4GJwIPB/MPABe4+AhgJjDezccGyBcDQYJ33gH892Y2R5PVO8V6umfk2+8qPMG/6WM4/o0fYkUSavXiOAHKB9e6+wd0rgPnAFTXaDAHeAHD3NUC2mfX0qP1Bm1bBy4N2f3L3ymDZIkB/7qWov60r4fpH/kGblmk897VzGNU/PexIIikhngLQF9gaM10czIu1HLgKwMxygSyCDt3M0sysCNgJLHD3xbX8jDzg1fpFl+bgxaJt5D2xhH7d2vO/Xz+HUzM6hh1JJGXEUwBqu/3Ca0zfD6QHHf3twDKgEsDdq9x9JNGCkHv0+sDH39zsnqDt07X+cLNbzKzAzApKSkriiCvJ4rG/beCb84sY3T+dZ287m56d24YdSSSlxHMXUDHQL2Y6E9ge28Ddy4ApABa9X29j8IptU2pmbwLjgZVB20nAl4EL3b1mUTm63iPAIwCRSKTWNpJcqqudB15bw28XbuCSob341VdH0rZVWtixRFJOPEcAS4BBZjbAzFoD1wMvxTYws67BMoBpwEJ3LzOzjKN395hZO+AiYE0wPR74PnC5ux9smM2RRHekqpo7fr+c3y7cwM3jsnhowmh1/iIhqfMIwN0rzWwG8DqQBsxy91VmdluwfCYwGJhrZlXAamBqsHpvYE5wJ1EL4Fl3fyVY9hDQBlgQDPJZ5O63NdymSaI5cLiSrz29lIXvlXDHF0/nX84/TQO8REJkxzjzkpAikYgXFBSEHUNOwEf7D5P3xBLe2baXn141jK+O6R92JJGUYWaF7h6pOV8jgaXRbd19kImz8tleeojf3hzhC0N6hh1JRFABkEa2ensZk2bnU1FZzdPTxhLJ7hZ2JBEJqABIo3n7/V3cOreQjm1bMu+2sxnUs1PYkUQkhgqANIo/rviAb/+uiOzu7ZmTl0vvLu3CjiQiNagASIOb8/Ym7nt5FTn903l80hi6tG8VdiQRqYUKgDQYd+cXf3qPh/6ynosG9+ShCaN0j79IAlMBkAZRWVXN3S+8w7MFxdyQ248fXzGUlml6fKNIIlMBkJN2qKKKGfOW8saanXzjwkF8+6JBGuAlkgRUAOSk7DlQwdQ5S1i2tZT/+MpQbhqXFXYkEYmTCoCcsG2lh5g0K58tuw/ymxtHM35o77AjiUg9qADICVn74T4mzcrnQEUlc/NyGTfwlLAjiUg9qQBIveVv3M20OUto1zqN3992Nmf26hx2JBE5ASoAUi+vr/qQ259ZRmZ6O+bm5ZKZ3j7sSCJyglQAJG7zFm/h3j+8w/DMrsyaPIZuHVrXvZKIJCwVAKmTu/PgG+v4rz+v4/wzMnj4xtG0b623jkiy02+xHFdVtfODF1cyb/EWrsnJ5KdXDaOVBniJNAsqAHJM5Ueq+Ob8Zby+agdf//yp3HnxGRrgJdKMqABIrfYeOsL0OQUs2bybH142hCnnDgg7kog0MBUA+Sc7ysqZ+Hg+G3bt57+vH8VlI/qEHUlEGoEKgHzKzn3l3PDIInaUlfPElFzOPa172JFEpJGoAMjHPtp/mBsfXcyHZeXMzcvV4xtFmjndziEAlB6s4ObHo5/r89ikiDp/kRSgAiCUlR9h0qx81u/czyMTI5xzqk77iKQCFYAUd+BwJVNmL2HV9jJ+feNoPnd6RtiRRKSJ6BpACjtUUcXUOUso2lrKQzeM4qIhPcOOJCJNSEcAKar8SBW3PFnA4o27+eV1I7hkmD7LXyTVxFUAzGy8ma01s/Vmdlcty9PN7AUzW2Fm+WY2NJjfNphebmarzOxHMetcG8yrNrNIw22S1KWispoZ85byt3W7eODq4Vwxsm/YkUQkBHUWADNLAx4GLgGGADeY2ZAaze4Gitx9ODAReDCYfxi4wN1HACOB8WY2Lli2ErgKWHjSWyFxq6yq5pvzl/Hnd3fyH18ZynWRfmFHEpGQxHMEkAusd/cN7l4BzAeuqNFmCPAGgLuvAbLNrKdH7Q/atApeHrR7193XNsRGSHyqqp3v/n45r678kB98eYie3yuS4uIpAH2BrTHTxcG8WMuJ/jWPmeUCWUBmMJ1mZkXATmCBuy+uT0Azu8XMCsysoKSkpD6rSozqaueu51fwYtF2vj/+TKZ+Rp/tI5Lq4ikAtX38o9eYvh9IDzr624FlQCWAu1e5+0iiBSH36PWBeLn7I+4ecfdIRoZuUTwR7tGPdP59YTHfumgQX/v8qWFHEpEEEM9toMVA7IniTGB7bAN3LwOmAFj084I3Bq/YNqVm9iYwnuj5f2kC7s6/v7Kapxdv4WufP5VvXjgo7EgikiDiOQJYAgwyswFm1hq4HngptoGZdQ2WAUwDFrp7mZllmFnXoE074CJgTcPFl+Nxdx54bS2z39pE3rkD+J4+z19EYtR5BODulWY2A3gdSANmufsqM7stWD4TGAzMNbMqYDUwNVi9NzAnuJOoBfCsu78CYGZXAv8DZAB/NLMid7+4YTcvtT34xjpm/vV9bhzbnx98ebA6fxH5FHOveTo/cUUiES8oKAg7RlL49Zvr+dlra7kmJ5OfXT2cFi3U+YukKjMrdPd/Gm+lkcDN0ON/38jPXlvL5SP68IA6fxE5BhWAZuapRZv58SuruWRoL3553QjS1PmLyDGoADQjzxZs5d4/rOTCM3vw4PWjaJmm/14ROTb1EM3Ei0Xb+P7zKzhvUHcevnE0rVvqv1ZEjk+9RDPw6jsf8J1nlzN2QDceuTlC21ZpYUcSkSSgApDk/rx6B7c/s4yR/bry+KQxtGutzl9E4qMCkMQWvlfC159eyll9OjN7yhg6tNHzfUQkfioASeof73/E9LkFnNajI3PzxtK5bauwI4lIklEBSEIFm3Yzdc4Ssk5pz5NTc+nSXp2/iNSfCkCSKdpayuTZS+jVuS1PTRvLKR3bhB1JRJKUCkASWbV9LxMfX0x6h1Y8PX0sPTq1DTuSiCQxFYAk8d6Ofdz02GI6tmnJvGnj6N2lXdiRRCTJqQAkgQ0l+5nw6GJapbVg3vRx9OvWPuxIItIMqAAkuC0fHWTCo4sBZ970cWR37xB2JBFpJnTjeALbVnqIGx5dRHllFfNvGcdpPTqGHUlEmhEdASSoHWXlTHh0EWXlR3hq6ljO7NU57Egi0syoACSgkn2HmfDoInbtO8zcvFyG9u0SdiQRaYZ0CijB7D5QwU2PLWZ7aTlz8nIZ1T897Egi0kzpCCCB7D10hJsfX8ymjw7w+KQIuQO6hR1JRJoxFYAEsa/8CJNm5bNux35+e3MO55zWPexIItLMqQAkgIMVleQ9sYSV2/by0IRRfP6MHmFHEpEUoAIQsvIjVUybU0Dh5j08eP0ovnhWr7AjiUiK0EXgEB2urOLWJwv5x4aP+OV1I7h0eO+wI4lICtERQEiOVFUzY94y/vpeCfdfNYwrR2WGHUlEUowKQAgqq6r51vwiFqzewb9fcRZfHdM/7EgikoJUAJpYVbVz53Mr+OM7H3DvpYOZeHZ22JFEJEXFVQDMbLyZrTWz9WZ2Vy3L083sBTNbYWb5ZjY0mN82mF5uZqvM7Ecx63QzswVmti74t9mPeKqudu554R1eWLaNOy8+g2nnDQw7koiksDoLgJmlAQ8DlwBDgBvMbEiNZncDRe4+HJgIPBjMPwxc4O4jgJHAeDMbFyy7C3jD3QcBbwTTzZa7c9/Lq5i/ZCvfuOA0/uX808KOJCIpLp4jgFxgvbtvcPcKYD5wRY02Q4h24rj7GiDbzHp61P6gTavg5cH0FcCc4Os5wFdOfDMSm7vzkz++y9x/bObWzw7k2184PexIIiJxFYC+wNaY6eJgXqzlwFUAZpYLZAGZwXSamRUBO4EF7r44WKenu38AEPxb6+gnM7vFzArMrKCkpCS+rUowv/jTezz2941MPiebuy45EzMLO5KISFwFoLbeymtM3w+kBx397cAyoBLA3avcfSTRgpB79PpAvNz9EXePuHskIyOjPqsmhP95Yx0P/WU9N+T254eXDVHnLyIJI56BYMVAv5jpTGB7bAN3LwOmAFi0h9sYvGLblJrZm8B4YCWww8x6u/sHZtab6BFCs/Lbv77PLxa8x9WjM/nJV4aq8xeRhBLPEcASYJCZDTCz1sD1wEuxDcysa7AMYBqw0N3LzCzDzLoGbdoBFwFrgnYvAZOCrycBL57cpiSWJ97ayE9fXcNlI/rws2uG06KFOn8RSSx1HgG4e6WZzQBeB9KAWe6+ysxuC5bPBAYDc82sClgNTA1W7w3MCe4kagE86+6vBMvuB541s6nAFuDaBtyuUM1bvIX7Xl7NxWf15JfXjSBNnb+IJCBzr3k6P3FFIhEvKCgIO8ZxPV9YzB3PLef8M3ow86YcWrfUWDsRCZeZFbp7pOZ89U4N6OXl27nzueWce2p3fn3jaHX+IpLQ1EM1kNdWfsi3fldEJLsbj06M0LZVWtiRRESOSwWgAfxlzU5uf2YpwzO7MGvyGNq1VucvIolPBeAk/X3dLm59qpAze3XmiSm5dGyjRyyISHJQATgJizd8xLS5SxjYvQNz83Lp0q5V2JFEROKmAnCCCjfvIe+JJWSmt+epaWNJ79C67pVERBKICsAJWFFcyuRZ+WR0asO8aWPp3rFN2JFEROpNBaCeVm8v4+bH8+nSvhXzpo+jR+e2YUcSETkhKgD1sG7HPm5+fDHtW6fxzPRx9OnaLuxIIiInTAUgTht3HWDCY4tp0cKYN30c/bq1DzuSiMhJUQGIw9bdB5nw6CKqq51508YyoHuHsCOJiJw03bReh+2lh5jw2CIOHanimenjGNSzU9iRREQahI4AjmNnWTk3PraY0gNHeDJvLIN7dw47kohIg9ERwDHs2n+YCY8tZkdZOU9OHcuwzC5hRxIRaVA6AqhF6cEKbnpsMcV7DjJr8hhystLDjiQi0uB0BFBDWfkRbn48nw27DjBr0hjGDTwl7EgiIo1CRwAx9h+uZPKsfNZ8WMbMm0bzmUHdw44kItJodAQQOFRRRd4TS1hevJeHJ4zmgjN7hh1JRKRR6QgAKD9SxfS5BRRs2s1/fXUk44f2CjuSiEijS/kjgIrKar72VCFvvb+L/3fNCC4b0SfsSCIiTSKljwCOVFVz+zNL+cvaEn7ylWFcnZMZdiQRkSaTsgWgqtr5zrPLeX3VDu67bAgTxvYPO5KISJNKyQJQXe1877kVvLx8O3d/6Uwmnzsg7EgiIk0u5QqAu3PPH1by/NJivvuF07nls6eGHUlEJBQpVQDcnR+9vJpn8rcw4/zTuP3CQWFHEhEJTcoUAHfn/lfX8MTbm5h+3gC++8XTw44kIhKquAqAmY03s7Vmtt7M7qplebqZvWBmK8ws38yGBvP7mdlfzOxdM1tlZt+MWWeEmf3DzN4xs5fNrFE/avNXf17HbxduYOLZWdz9pcGYWWP+OBGRhFdnATCzNOBh4BJgCHCDmQ2p0exuoMjdhwMTgQeD+ZXAd919MDAO+JeYdR8D7nL3YcALwJ0nuzHH8us31/Pfb6zjq5F+3HfZWer8RUSI7wggF1jv7hvcvQKYD1xRo80Q4A0Ad18DZJtZT3f/wN2XBvP3Ae8CfYN1zgAWBl8vAK4+qS05jqxuHbg2J5P/vGoYLVqo8xcRgfgKQF9ga8x0MZ904kctB64CMLNcIAv41KgqM8sGRgGLg1krgcuDr68F+tX2w83sFjMrMLOCkpKSOOL+s0uH9+bn144gTZ2/iMjH4ikAtfWaXmP6fiDdzIqA24FlRE//RL+BWUfgeeBb7l4WzM4jekqoEOgEVNT2w939EXePuHskIyMjjrgiIhKPeD4LqJhP/3WeCWyPbRB06lMALHqCfWPwwsxaEe38n3b3/41ZZw3wxaDN6WX8xxsAAAS+SURBVMClJ7wVIiJSb/EcASwBBpnZADNrDVwPvBTbwMy6BssApgEL3b0sKAaPA++6+y9rrNMj+LcFcC8w8+Q2RURE6qPOAuDulcAM4HWiF3GfdfdVZnabmd0WNBsMrDKzNUTvFjp6u+e5wM3ABWZWFLy+FCy7wczeA9YQPaKY3WBbJSIidTL3mqfzE1ckEvGCgoKwY4iIJBUzK3T3SM35KTMSWEREPk0FQEQkRakAiIikqKS6BmBmJcDmE1y9O7CrAeM0FOWqH+WqH+Wqn0TNBSeXLcvd/2kgVVIVgJNhZgW1XQQJm3LVj3LVj3LVT6LmgsbJplNAIiIpSgVARCRFpVIBeCTsAMegXPWjXPWjXPWTqLmgEbKlzDUAERH5tFQ6AhARkRgqACIiKapZFIBjPXvYzK4NpqvNLFJjnX8NnnG81swuToRcZpZtZodiPjivUT4h9Ti5fm5ma4JnO79gZl1j1glzf9Waq6n2Vx3ZfhzkKjKzP5lZn5h1wtxnteYK+z0Ws/wOM3Mz6x4zL7T9daxcYe8vM7vPzLbZP3+YZsPsL3dP+hfQGxgdfN0JeI/oYyoHE3305JtAJKb9EKJPMWsDDADeB9ISIFc2sDLE/fVFoGUw/wHggQTZX8fK1ST7q45snWPafAOYmSD77Fi5Qn2PBdP9iH668GageyLsr+PkCvt38j7gjlraN8j+ahZHAH6MZw+7+7vuvraWVa4A5rv7YXffCKwn+uzjsHM1iePk+pNHP/4bYBGfPNYz7P11rFxN5jjZymKadeCTp+WFvc+OlatJHCtXsPhXwPdqZAp1fx0nV5OoI1dtGmR/NYsCEMv++dnDtYnnOccNKs5cAAPMbJmZ/dXMzmvMTHXkygNeDb5OpP0VmwuaeH/Vls3MfmJmW4EbgX8LmoW+z46RC0J8j5nZ5cA2d19eo1mo++s4uSD838kZwem8WWaWHsxrkP3VrAqA1f7s4Vqb1jKv0ap+PXJ9APR391HAd4B5Zta5qXOZ2T1En+n89NFZtaze5PurllxNur+Olc3d73H3fkGuGUeb1rJ6k+6zY+QK7T1G9P/uHj5djD5uWsu8JtlfdeQK+3fyN8CpwMggyy+ONq1l9Xrvr2ZTAOwYzx4+hjqfcxxGruBw7qPg60Ki5/VOb8pcZjYJ+DJwowcnG0mA/VVbrqbcX8fLFmMecHXwdej7rLZcIb/HTiV6vnq5mW0iuk+Wmlkvwt1fx8wV9u+ku+9w9yp3rwYe5ZPTPA2zvxryQkZYL6LVcC7wX8dY/iafvth6Fp++gLKBxrngVN9cGUdzAAOBbUC3psoFjAdWAxk15oe6v46Tq0n2Vx3ZBsV8fTvwXILss2PlCvU9VqPNJj652JoQv5O15Ar7d7J3zNffJnrev8H2V4P/ooTxAj5D9PBnBVAUvL4EXEm0Uh4GdgCvx6xzD9Fqvha4JBFyEf0rbVXwH7sUuKyJc60nel7x6LyZCbK/as3VVPurjmzPAyuD+S8TvQCbCPus1lxhv8dqtNlE0NGGvb+OlSvs/QU8CbwTzH+JTxeEk95f+igIEZEU1WyuAYiISP2oAIiIpCgVABGRFKUCICKSolQARERSlAqAiEiKUgEQEUlR/x+Lrs1gU2zy1wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "score = []\n",
    "for i in range(250,200,-10):\n",
    "    X_fschi = SelectKBest(chi2, k=i).fit_transform(X_fsvar, y)\n",
    "    once = cross_val_score(RandomForestClassifier(n_estimators=10,random_state=0),X_fschi,y,cv=5).mean()\n",
    "    score.append(once)\n",
    "plt.plot(range(250,200,-10),score)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1433554.26798015, 1429043.15373433, 1332663.17213405,\n",
       "       1625807.41495542, 1630206.90922916, 1630597.02883804,\n",
       "       1633456.72909664, 1610816.75571229, 1483382.49543886,\n",
       "       1256290.1574794 ,  951236.1617682 ,  693192.66191748,\n",
       "       1269945.07968831, 1221758.57688808, 1146535.17810241,\n",
       "       1080657.20185303, 1079065.30979135, 1092222.70610032,\n",
       "       1064908.45385716, 1023327.00231067,  974163.15420165,\n",
       "        918857.12860617,  861439.52030749,  657819.3908855 ,\n",
       "        599159.21961671,  576483.60795847,  559848.1818137 ,\n",
       "        536985.56062372,  561457.57734769,  594428.19185935,\n",
       "        592414.89830452,  587801.84097643,  672232.60135169,\n",
       "        790511.70530618,  866573.70991777,  891422.58050934,\n",
       "        641105.0929774 ,  632098.97938142,  725189.43548604,\n",
       "        853879.48154986,  863895.50862873,  709440.99808713,\n",
       "        615099.361498  ,  660082.35138802,  662040.13166049,\n",
       "        647432.43321103,  718070.06251003,  868119.93550552,\n",
       "        995128.78948214,  981295.46383871,  827660.8911231 ,\n",
       "       1008184.76195542, 1212658.65697336, 1279652.35847441,\n",
       "       1071947.51866571,  712579.55021262,  708178.91784269,\n",
       "        931871.22430817,  836155.03350401,  781584.17446604,\n",
       "        883252.58134165, 1015304.18853993, 1153480.28062008,\n",
       "       1235182.10720641,  980458.35707785, 1285689.5977369 ,\n",
       "       1412800.83270279, 1269424.03486304,  844394.53343881,\n",
       "        453037.70035635,  906205.5333485 , 1314337.38985735,\n",
       "        875502.64893107,  848574.25317153, 1088364.70535319,\n",
       "       1143964.61799576, 1231934.57606489, 1525266.11466634,\n",
       "        840830.08443577, 1249668.53527256, 1487299.06201808,\n",
       "       1440993.69232521, 1234157.54433962,  891091.32709079,\n",
       "        767327.74644144, 1767505.95851489, 1794612.36340341,\n",
       "        843649.64862696,  974860.15688277, 1282142.9789604 ,\n",
       "       1121437.99502364, 1158258.4373389 , 1803319.16430163,\n",
       "       1175840.18589306, 1569563.41213805, 1615071.70298461,\n",
       "       1453812.39367812, 1429193.59276003, 1469624.15320088,\n",
       "       1610038.8334007 , 2381990.83018419, 1684641.44646863,\n",
       "        781514.82471784, 1136274.26981954, 1258505.95202954,\n",
       "        900621.20914746, 1033026.39393914, 1617721.02614334,\n",
       "       1859303.87013649, 1706658.49975169, 1442798.15960511,\n",
       "       1497467.99791583, 1635633.52918659, 1789954.0440811 ,\n",
       "       2042036.97897042, 1243975.13567915,  847897.40531407,\n",
       "       1334491.44036763, 1146547.23628072,  721408.29456933,\n",
       "       1099901.34212844, 2008828.59327289, 2076128.28182046,\n",
       "       1775258.41727455, 1374882.05100453, 1223860.78474695,\n",
       "       1190499.55493678, 1469994.89713294, 1556218.16894472,\n",
       "        936604.6211571 , 1052799.96220046, 1487484.96092392,\n",
       "       1096221.91497984,  754081.7574313 , 1308952.47883141,\n",
       "       2269151.28497174, 2192372.5304657 , 2133745.04961112,\n",
       "       1807649.67676254, 1420349.92183107, 1222343.35267926,\n",
       "       1165504.99607627, 1500572.78318498, 1432663.59271325,\n",
       "        890862.82928644, 1274280.76943075, 1432072.36165076,\n",
       "        956997.59590817,  888742.14373383, 1508163.46238821,\n",
       "       2117988.12236915, 2179634.12755044, 1979740.23184782,\n",
       "       1717723.16781811, 1552275.10457858, 1461315.1411536 ,\n",
       "       1494620.94971976, 1739520.65681857, 1442815.46046628,\n",
       "       1014723.34171191, 1363794.15539944, 1122747.63831017,\n",
       "        739634.35332833, 1067567.31929299, 1667592.5097389 ,\n",
       "       1864455.97854722, 2170645.77302745, 1901786.79198259,\n",
       "       1733725.85760386, 1753508.37671084, 1712712.42549789,\n",
       "       1733493.582237  , 1770340.59820148, 1310901.04423274,\n",
       "       1008023.90988477, 1061378.72275934,  662457.8217003 ,\n",
       "        729648.40292041, 1370253.32454603, 1741943.51510473,\n",
       "       1593454.39731162, 2263953.8025206 , 2028172.33782053,\n",
       "       1992515.52928071, 2172520.74824953, 2035403.53821729,\n",
       "       1758624.07224114, 1514817.58748747, 1002116.10560135,\n",
       "        669379.60246184,  544978.32282227,  576722.03477821,\n",
       "       1098371.32650041, 1603057.05343338, 1612844.96289426,\n",
       "       2298916.31712188, 2140360.89085296, 2070900.53848134,\n",
       "       2159471.16950063, 1981772.43302263, 1521754.51234479,\n",
       "       1076334.48566226,  610262.09016783,  351852.98340462,\n",
       "        461487.7601437 ,  878540.35993713, 1337597.14727232,\n",
       "       1481891.62067955, 1342562.57238699, 2239501.99047448,\n",
       "       2199634.11766741, 1930471.52367226, 1666073.70450191,\n",
       "       1364054.30645472, 1000936.25390226,  656080.48075838,\n",
       "        389810.08772878,  340150.21186292,  602275.43941511,\n",
       "        959623.37641551, 1156974.48558905, 1135816.98677808,\n",
       "       1044294.97185606, 1970679.86657318, 2139321.52912066,\n",
       "       1894361.40873889, 1368494.5514217 ,  829396.07488768,\n",
       "        488455.25931088,  331978.08818759,  293365.6522867 ,\n",
       "        387474.16211622,  600489.80555106,  783767.50733716,\n",
       "        831013.99296771,  768407.80393857, 1382346.6477236 ,\n",
       "       1684701.13053512, 1732335.2436048 , 1492804.58962318,\n",
       "       1144230.23099215,  852589.97157847,  719215.03719448,\n",
       "        675891.53891187,  687917.5152402 ,  708292.70911948,\n",
       "        673810.32849758,  550803.45299243, 1098249.34334037,\n",
       "       1225055.2730661 , 1312843.88397644, 1325774.40817926,\n",
       "       1236093.76813092, 1036608.01098297,  829620.55626671,\n",
       "        654964.88596563,  520032.72156302,  942402.80700557,\n",
       "       1044698.95132913, 1009807.32615993,  844407.23356341])"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chivalue, pvalues_chi = chi2(X_fsvar,y)\n",
    "chivalue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0.])"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pvalues_chi"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### F检验\n",
    "\n",
    "检测特征与标签之间是否有线性关系"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_selection import f_classif"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1200.99190762, 1209.29489877, 1110.4944286 , 1424.49864852,\n",
       "       1569.26556677, 1742.49910702, 1910.98023795, 1969.20520223,\n",
       "       1731.37475948, 1295.09668012,  839.15325001,  531.97951763,\n",
       "       1086.0472161 , 1177.72017709, 1253.79641973, 1344.06961068,\n",
       "       1507.33781169, 1616.50454434, 1512.25864876, 1289.65180587,\n",
       "       1051.26276412,  839.48869386,  680.07426932,  515.7498157 ,\n",
       "        532.86107778,  594.62512658,  664.18740444,  709.37133696,\n",
       "        798.11767931,  876.69849088,  852.76926441,  785.70173347,\n",
       "        802.88980095,  813.2041131 ,  760.85552527,  687.94148028,\n",
       "        476.23046034,  536.72332365,  740.12587382, 1041.38089649,\n",
       "       1168.8028973 ,  941.91083922,  795.72843454,  861.29818828,\n",
       "        868.19464432,  838.80173567,  886.26659655,  959.12740961,\n",
       "        934.56890789,  783.1988476 ,  657.20547321,  981.66873526,\n",
       "       1465.82267956, 1756.05831022, 1385.28086085,  798.73125604,\n",
       "        761.40508874, 1062.6919609 ,  979.38193965,  947.82602644,\n",
       "       1085.00522683, 1152.13801689, 1118.1595422 , 1021.13086631,\n",
       "        829.92259533, 1376.4852629 , 1811.62922878, 1601.33613631,\n",
       "        898.8719158 ,  417.37765921,  895.77244253, 1455.38592931,\n",
       "        956.2421521 ,  990.1748413 , 1359.47406197, 1279.27992017,\n",
       "       1166.80888121, 1291.41792351,  621.47324186, 1139.04489426,\n",
       "       1713.54508435, 1823.42451065, 1436.53069242,  884.19442779,\n",
       "        717.63373994, 2026.90370414, 2219.46450157,  943.55587655,\n",
       "       1217.29127813, 1677.03878308, 1193.63540136, 1039.56842784,\n",
       "       1570.18098323,  928.80156872, 1562.54171587, 1940.54801063,\n",
       "       1816.57346013, 1683.83193784, 1619.17496376, 1865.78706551,\n",
       "       3482.82350415, 2326.10253286,  990.67999393, 1632.46650414,\n",
       "       1652.51500198,  891.26746579,  883.96689508, 1394.82469151,\n",
       "       2008.19411716, 2107.3680475 , 1767.97892382, 1786.08753011,\n",
       "       1980.1986791 , 2509.14739387, 3366.13986444, 1959.90573326,\n",
       "       1299.36608875, 2218.28123025, 1470.25657381,  681.02610086,\n",
       "        937.54741741, 1881.71834116, 2351.30851824, 2175.48525458,\n",
       "       1624.49647062, 1399.44534221, 1440.98664744, 2229.25720739,\n",
       "       2764.00452882, 1633.74258116, 1870.29253742, 2628.79930504,\n",
       "       1367.31440177,  707.38857243, 1150.06936228, 2089.08213594,\n",
       "       2116.40726513, 2399.53090598, 2143.53519978, 1651.89817908,\n",
       "       1414.71662551, 1481.62100314, 2468.21266727, 2666.18025642,\n",
       "       1520.6400065 , 2223.14029953, 2271.07109628, 1111.06997494,\n",
       "        844.31183874, 1388.60413626, 1917.10207189, 2085.21461056,\n",
       "       2073.68356276, 1880.26929744, 1756.40165025, 1716.45478479,\n",
       "       1964.08537105, 2796.13761562, 2413.09378391, 1543.01310963,\n",
       "       2118.10377396, 1475.29541488,  783.59003763, 1040.65400476,\n",
       "       1582.46200024, 1617.32566033, 2067.20755476, 1893.35116837,\n",
       "       1795.96538455, 1922.58627318, 1951.69309645, 2115.44871238,\n",
       "       2479.27958039, 1809.12095649, 1330.8686207 , 1396.29767244,\n",
       "        741.9063402 ,  751.14036409, 1410.18529816, 1677.6595494 ,\n",
       "       1308.77910167, 2191.69964967, 2035.63638826, 2114.65218363,\n",
       "       2511.27142071, 2363.46743373, 2053.7687027 , 1865.84769096,\n",
       "       1202.94179711,  793.61414555,  633.71267282,  636.18282736,\n",
       "       1218.61245591, 1712.62901816, 1484.60290068, 2264.93587233,\n",
       "       2262.87269162, 2323.50890468, 2611.66920897, 2387.45723028,\n",
       "       1763.5696083 , 1256.32165954,  704.77285945,  406.94580935,\n",
       "        548.06969664, 1051.50016486, 1542.11172909, 1494.38472469,\n",
       "       1130.61174365, 2204.00694989, 2444.69535795, 2267.62871155,\n",
       "       2003.69161124, 1643.94961527, 1202.35520102,  804.18805494,\n",
       "        483.32932365,  420.99263006,  750.06949525, 1136.32227345,\n",
       "       1202.49476981,  990.75097727,  791.03016258, 1826.54973661,\n",
       "       2361.75564926, 2313.09139096, 1694.26613916, 1012.97938867,\n",
       "        608.4174945 ,  432.07115684,  383.54620406,  487.70312805,\n",
       "        698.78061024,  797.0763827 ,  714.70722998,  574.2849126 ,\n",
       "       1103.81003251, 1590.83695172, 1912.74984902, 1832.62220523,\n",
       "       1482.39046946, 1142.10827805,  968.65089356,  860.24853405,\n",
       "        780.75215696,  696.78170045,  567.41403081,  403.59649375,\n",
       "        940.06512113, 1205.58777055, 1485.37178744, 1623.12886955,\n",
       "       1488.04856361, 1119.91615126,  770.06544455,  530.6398126 ,\n",
       "        376.66549502,  727.78945347,  853.98680046,  819.19801306,\n",
       "        656.55547718])"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F, p = f_classif(X_fsvar, y)\n",
    "F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0., 0., 0., 0.])"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "# p值大于0.05拒绝原假设，F检验原假设为特征与标签显著线性相关\n",
    "k = F.shape[0] - (p > 0.05).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "261"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "k"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 互信息法\n",
    "检测特征与标签之前的任何关系，0代表两个变量独立，1代表两个变量完全相关"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_selection import mutual_info_classif as MIC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.16464777, 0.17039638, 0.14877006, 0.15201399, 0.17948738,\n",
       "       0.20443589, 0.22621755, 0.23068017, 0.22182767, 0.18014634,\n",
       "       0.1393084 , 0.10496288, 0.12018338, 0.14107803, 0.1478493 ,\n",
       "       0.1601772 , 0.1771884 , 0.1881098 , 0.17632808, 0.15544731,\n",
       "       0.13785701, 0.11209763, 0.09967391, 0.08395678, 0.09526003,\n",
       "       0.10054395, 0.11409294, 0.10609254, 0.11594668, 0.11925878,\n",
       "       0.12188769, 0.11180155, 0.11427548, 0.11767441, 0.11043259,\n",
       "       0.09405475, 0.07763607, 0.09114352, 0.11490271, 0.1430405 ,\n",
       "       0.1457611 , 0.11983108, 0.11038255, 0.12261127, 0.11325983,\n",
       "       0.11450383, 0.12722275, 0.14128231, 0.12314701, 0.10336112,\n",
       "       0.0968835 , 0.1358363 , 0.18647531, 0.20455954, 0.15784041,\n",
       "       0.10762345, 0.10498656, 0.1268229 , 0.11578533, 0.13237192,\n",
       "       0.14428386, 0.15489142, 0.13831257, 0.12008827, 0.12536496,\n",
       "       0.17558366, 0.22131725, 0.19107452, 0.12015322, 0.07451139,\n",
       "       0.11236347, 0.14385201, 0.1199244 , 0.13119526, 0.16252834,\n",
       "       0.16102219, 0.13773245, 0.12893312, 0.09367914, 0.15578828,\n",
       "       0.21281334, 0.21297141, 0.17365578, 0.10886296, 0.10578485,\n",
       "       0.17805379, 0.19092909, 0.12352591, 0.14253148, 0.18139258,\n",
       "       0.15067472, 0.13563751, 0.13836397, 0.12685206, 0.19157972,\n",
       "       0.22606565, 0.21969145, 0.19237563, 0.16507377, 0.19657206,\n",
       "       0.25951495, 0.22240677, 0.14306216, 0.18192042, 0.1886465 ,\n",
       "       0.13693483, 0.12557612, 0.14823422, 0.21061856, 0.23113334,\n",
       "       0.21297094, 0.20391133, 0.20025506, 0.24722128, 0.28517596,\n",
       "       0.21486523, 0.1782978 , 0.21927579, 0.17412006, 0.12111387,\n",
       "       0.11926228, 0.17809719, 0.22399518, 0.21557707, 0.1946871 ,\n",
       "       0.17094566, 0.17666078, 0.24626193, 0.2847926 , 0.2025767 ,\n",
       "       0.21490259, 0.25207691, 0.18680718, 0.1217684 , 0.13235238,\n",
       "       0.16034618, 0.1909878 , 0.23017031, 0.22064994, 0.18806995,\n",
       "       0.17199009, 0.19732046, 0.25459336, 0.2758476 , 0.20443692,\n",
       "       0.23198324, 0.23884376, 0.16057013, 0.13494314, 0.15466125,\n",
       "       0.16173597, 0.19190946, 0.20687943, 0.1976726 , 0.1825281 ,\n",
       "       0.18193948, 0.22096719, 0.27002565, 0.24655944, 0.19062872,\n",
       "       0.20782937, 0.18399638, 0.13610532, 0.14345602, 0.16750159,\n",
       "       0.15457183, 0.17875981, 0.19420543, 0.18930793, 0.19858628,\n",
       "       0.20108583, 0.21641634, 0.24565639, 0.19579032, 0.14688549,\n",
       "       0.15754618, 0.12098249, 0.13523771, 0.16393092, 0.1712111 ,\n",
       "       0.14304076, 0.18410017, 0.18909688, 0.19314937, 0.21368502,\n",
       "       0.21542386, 0.19614868, 0.19591861, 0.14538657, 0.0983756 ,\n",
       "       0.09595536, 0.10721368, 0.15930851, 0.18904815, 0.17408475,\n",
       "       0.20970415, 0.20982078, 0.21971066, 0.21880495, 0.20823283,\n",
       "       0.17171886, 0.1459481 , 0.08913386, 0.06582762, 0.08706288,\n",
       "       0.13953912, 0.18865972, 0.19179636, 0.15360153, 0.21918778,\n",
       "       0.23021894, 0.23346791, 0.21795017, 0.17425476, 0.14176469,\n",
       "       0.10647667, 0.07712883, 0.06924502, 0.10354999, 0.14987781,\n",
       "       0.1602145 , 0.14334927, 0.1194241 , 0.20704444, 0.23042018,\n",
       "       0.23879498, 0.19224316, 0.13635516, 0.10031441, 0.07076891,\n",
       "       0.07015625, 0.0788179 , 0.10170835, 0.12434944, 0.1159271 ,\n",
       "       0.09311912, 0.1483055 , 0.18272718, 0.19831068, 0.18946631,\n",
       "       0.16166097, 0.12807711, 0.12221226, 0.11270824, 0.11006938,\n",
       "       0.09042638, 0.08759487, 0.0742336 , 0.13740928, 0.16060171,\n",
       "       0.17995739, 0.18361763, 0.18016367, 0.15954738, 0.12073055,\n",
       "       0.09237908, 0.07162732, 0.11422429, 0.12401193, 0.12155985,\n",
       "       0.10710489])"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = MIC(X_fsvar, y)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "k = result.shape[0] - sum(result <= 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 嵌入法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(42000, 51)"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.feature_selection import SelectFromModel\n",
    "\n",
    "rfc = RandomForestClassifier(n_estimators=10, random_state=0)\n",
    "x_embedded = SelectFromModel(rfc, threshold=0.005).fit_transform(x,y)\n",
    "x_embedded.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxcdb3/8dcnmSzN2mbpnjZpk7ak0gXSDSgUUGhVFgEhxR+IyK1FC16vekG9F3/3eu9V4LoglAtVUZSl9iIoCgoqu6WlKd0o3dJ0C92SdE3a7N/7xww1TZNmmszkzEzez8djHpk55zuTd6PnzZmZc77HnHOIiEj0i/M6gIiIhIYKXUQkRqjQRURihApdRCRGqNBFRGKEz6tfnJOT4/Lz87369SIiUWnlypXVzrncjtZ5Vuj5+fmUlZV59etFRKKSme3obJ0+chERiREqdBGRGKFCFxGJESp0EZEYoUIXEYkRKnQRkRihQhcRiRGeHYfeE62tjsaWVhqaWmlobqGh2f+z/sPHTa0nljU0t1LfFBgTWN/c6khL8pHZL+HErX9K4on7yQlxmJnX/0wRkTMSdYX+h7W7WfDUqrD+jsT4ODL6JZDZ7+TS//CW0S+BtCQfKUk+UhPjSUn0BR7Hk5r495/xcfqPgoj0nqgr9DGD0vnypUUkJcSR5IsnyRdHcoL/Z5IvjqS2933xJCW0W++LJz7OqK1v5vDxptPejgR+Vtc2srWqzr+svolgrwmSnBB3UsGnJvlISYw/8e6gf0oi/VMS6N8vwf/zxGP/z+SE+PD+MUUkpkRloY/5WHqPXyczJYHMlIQzfl5rq+NoQzPHGpupa2jhWGMztQ3NHGtooa6xmWONLdQ1/H1dXWBcXYN/XW1DM3sP13P4eBOHjjXR2NLa6e9KTog7Ue5tiz4rNZHCgWmMG5zB6IGpJPlU/CIShYXutbg4O/HRS0855zje1MKhY02BWyOHAkV/6Hjj35cF1ldU13LoWBMHjzXS1OJ/m+CLM0bnpjF2cDrjhqRz1uAMxg1JZ3BGsr4HEOljVOgeMjNSEn2kJPoY2r9f0M9rbmlle00dG/YcZePeI2zae5SVOw7y/JrdJ8ZkJPsYNySDswanM25IBuMGpzNmUDqpSfqfXCRWaeuOQr74OAoHplM4MJ0rJg49sfzw8SY27zvKxj1H2Lj3KBv3HuWZlZXUNbacGDMyO4Vxg9OZkp/FtIJsiodm6MtbkRihQo8hmf0SmJKfxZT8rBPLWlsdHxw6zoY9/j35jXuP8t7uw7y0fh8A6Uk+SvIHMG1UNtNHZfORoRn44nV6gkg0UqHHuLg4Iy8rhbysFC4bP/jE8r2H61m+rYbl2w6wvKKGVzdVAZCaGM+5+VlMK8hi+qgszh7Wn0SfCl4kGpgL4hg8M5sNPADEAz91zn2v3foBwGPAaKAeuNU5997pXrOkpMTpAheRY//Ret7ZdoDlFQdYvq2GzftqAeiXEM85I/szvSCbaaOymZiXqaNqRDxkZiudcyUdruuq0M0sHtgMfAyoBFYAc51z77cZcz9Q65z7NzMbByx0zl16utdVoUe2mtoGf8FvO8Cyiho27j0KQJIvjol5/RmZlcLgzGQGZSQzOCP5xP3s1ETi9Jm8SNicrtCD+chlKlDunKsIvNhi4Crg/TZjioHvAjjnNppZvpkNcs7t61l08Up2WhJzzh7CnLOHAHDoWOOJgn9350He2FJF1dEGWtvtDyTEGwPTkxmUkdRh4Q/O8P/sl6i9fJFQC6bQhwG72jyuBKa1G7MGuAZ4y8ymAiOB4cBJhW5m84B5ACNGjOhmZPFC/5RELhs/+KTP4ZtbWqmubWTvkXr2Hq5n35F69h6pZ99h/8+Ne4/y+qaqk46y+VBqYjzZaUlkpyWSnZpEdmqi/35aEjlpiWSl+pfnpCUyIDWRBH1RK9KlYAq9o/fP7T+n+R7wgJmtBtYBq4DmU57k3CJgEfg/cjmzqBJpfPFxDM70732T1/m4o/VN/rI/3OAv/CP11NQ2UlPXwIG6Rj44dJy1lYc4UNdIc/td/oDMfglkpyWSk5pEbkYSn5k6gvMKc8L0LxOJTsEUeiUnb67Dgd1tBzjnjgCfAzD/6YnbAjcR0pMTSE9OoHDg6adscM5x5Hgz1XUN/sKvbaCmrvFE+fvvN7C84gAvrN3DpeMG8o2Pj+vydUX6imAKfQVQZGYFwAdAKXBj2wFm1h845pxrBG4D3giUvEjQzOzEHDujczsfV9/Uwi+WbmfhK+Vc/qM3mTs1j3/86Bhy0pJ6L6xIBOryg0nnXDOwAHgJ2AAscc6tN7P5ZjY/MOwsYL2ZbQTmAF8OV2CR5IR45l80mtf/+WL+37QRLH5nF7Puf42Fr5ZT33Tq5/UifUVQx6GHgw5blFDZWlXLvX/cyMvv72NIZjJfv3wsV08apsMnJSad7rBFHTogUW90bhqLbi5h8bzp5KYn8U9L1nDlwrdYurXa62givUqFLjFj+qhsfvvF83mgdBIH65q48SfLue3xFZTvP+p1NJFeoUKXmBIXZ1w1aRh//epF3DV7HMsrDnD5j97kX367juraBq/jiYSVCl1iUnJCPLfPGs1rX5/FZ6aN4Gl9cSp9gL4UlT5ha1Ut3/vjRv4c+OL0iolDuaAwh6kFWbp2q0SVHk3OFS4qdPHCsooaHnxlC+9sO0BTiyPRF0fJyAFcUJTDzMJcxg/N0NExEtFU6CLtHGtsZvm2A/xtSzVvlVefmE1yQEoC543O4YKiHC4ozCEvK8XjpCIn6+lsiyIxJyXRx8VjB3Lx2IGAfz74peU1vLmlmrfKq3hh3R4A8rNTOL8wh5lFOcwYnROSi4OLhIv20EXacc6xtarWX+5bqllWUUNdYwtxBhOG9+fCohxuvaCA/imJXkeVPkgfuYj0QFNLK6t3HeLNLdX8rbyaVTsPMnZwBk/eNo2sVJW69C4VukgIvbG5in/4ZRkFOak8cds0TQomvUqn/ouE0IVjcvnZZ6ewvaaOuYuWUXVUJyxJZFChi3TDBUU5PHbLFCoPHqd00dvsP1LvdSQRFbpId503OodffG4Kew7XU7poGXsPq9TFWyp0kR6YNiqbX946lX1H6ild9DZ7Dh/3OpL0YSp0kR4qyc/il5+fRk1tIzc8uowPDqnUxRsqdJEQOHfkAH512zQOHmvkhkffZteBY15Hkj5IhS4SIpPy+vPkbdM4cryJ0kXL2FmjUpfeFVShm9lsM9tkZuVmdncH6zPN7PdmtsbM1pvZ50IfVSTyTRjen6f+YTp1jc3csOhttlfXeR1J+pAuC93M4oGF+C/+XAzMNbPidsO+BLzvnJsIzAK+b2Y6hU76pI8My+Sp26ZT39RC6aJlVFTVeh1J+ohg9tCnAuXOuQrnXCOwGLiq3RgHpJuZAWnAAaA5pElFokjx0AyenjedppZWShcto3y/Sl3CL5hCHwbsavO4MrCsrYeAs4DdwDrgy8651vYvZGbzzKzMzMqqqqq6GVkkOowbnMHiedNpdVC6aBlb9unaphJewRR6R7P9t58A5nJgNTAUmAQ8ZGYZpzzJuUXOuRLnXElubu4ZhxWJNkWD0lk8bzpx5i/1TXtV6hI+wRR6JZDX5vFw/HvibX0OeNb5lQPbgHGhiSgS3QoHprF43nR88cbcnyzj/d1HvI4kMSqYQl8BFJlZQeCLzlLg+XZjdgKXApjZIGAsUBHKoCLRbFRuGr+eN4MkXxw3/nQZm/Xxi4RBl4XunGsGFgAvARuAJc659WY238zmB4Z9BzjPzNYBfwXucs5Vhyu0SDTKz0nl1/Nm4IuL40tPvsvxxhavI0mM0XzoIr3srS3V3PTYckqn5PHdayZ4HUeijOZDF4kgFxTlMP+i0Tz9zi5+v6b911Ei3adCF/HAP31sDJNH9Oebz67TvC8SMip0EQ8kxMfx49LJYHDH06toajnltA2RM6ZCF/FIXlYK9147gdW7DvH9lzd7HUdigApdxEMfP3sIN04bwSOvb+WNzTp7WnpGhS7isXs+WczYQen805LV7D+qy9hJ96nQRTyWnBDPgzdOprahma8uWUNrqzeHEkv0U6GLRIAxg9L59hXjeXNLNY++oZOspXtU6CIRonRKHp+YMITvv7yJd3ce9DqORCEVukiEMDO+e83ZDM5M5s6nV3H4eJPXkSTKqNBFIkhGcgI/njuZvYfr+eaz6/Bqag6JTip0kQhzzogBfO3ysbywbg+LV+zq+gkiASp0kQg0b+YoZhbl8P+fX6+pdiVoKnSRCBQXZ/zg+kmkJyew4ClNtSvBUaGLRKjc9CR+eMNENu+r5TsvvO91HIkCKnSRCDazKJf5F43mqeU7eWHtHq/jSIRToYtEuK9eNoZJef25+9m1mmpXTkuFLhLhEuLjeHDuZADuXKypdqVzKnSRKJCXlcL3rpnAqp2H+MGfNdWudCyoQjez2Wa2yczKzezuDtZ/3cxWB27vmVmLmWWFPq5I3/WJCUOYO9U/1e5bW3QNdjlVl4VuZvHAQmAOUAzMNbPitmOcc/c75yY55yYB3wBed84dCEdgkb7snk8WU5ibxleWrKamtsHrOBJhgtlDnwqUO+cqnHONwGLgqtOMnws8HYpwInKyfonx/HjuZA4fb+Lrz6zV1ABykmAKfRjQ9vzjysCyU5hZCjAb+E0n6+eZWZmZlVVV6eosIt1x1pAMvjlnHK9s3M8v397hdRyJIMEUunWwrLPdgiuAv3X2cYtzbpFzrsQ5V5KbmxtsRhFp57Pn5XPpuIH854sb2LDniNdxJEIEU+iVQF6bx8OB3Z2MLUUft4iEnZlx33UTyOyXwJ1Pr9LUAAIEV+grgCIzKzCzRPyl/Xz7QWaWCVwE/C60EUWkI9lpSfzg+ols2V/Lf2hqACGIQnfONQMLgJeADcAS59x6M5tvZvPbDP0U8LJzri48UUWkvZlFuXzhwlE8uXwnL63f63Uc8Zh59S15SUmJKysr8+R3i8SSxuZWrntkKTsPHOOPX57JkMx+XkeSMDKzlc65ko7W6UxRkSiX6IvjgdLJNDa38pVfr6alVYcy9lUqdJEYUJCTyr9dOZ5lFQd45PWtXscRj6jQRWLEdecO54qJQ/nBnzezaudBr+OIB1ToIjHCzPjPT32EIZnJ3Ll4FUfrm7yOJL1MhS4SQzKSE3igdBK7D9Vzz+/Wex1HepkKXSTGnDsyiy9fWsRzqz7guVWVXseRXqRCF4lBX7q4kKkFWfzLc++xo0anhvQVKnSRGBQfZ/zohknExxl3Ll6tqxz1ESp0kRg1tH8/7r12Amt2HeKHuspRn6BCF4lhc84ewtypefzP61tZWq6rHMU6FbpIjPvXTxYzKieVryxZzYG6Rq/jSBip0EViXEqijx/PnczBuibu+o2uchTLVOgifcD4oZncNWccf35/H08s3+l1HAkTFbpIH3Hr+fnMGpvLf/zhfcr3H/U6joSBCl2kjzAz7r9uIgnxcTz0SrnXcSQMVOgifUhuehKfLhnOC+v2sP9ovddxJMRU6CJ9zGdn5NPc6nhKn6XHHBW6SB+Tn5PKxWMH8sSynTQ26wzSWBJUoZvZbDPbZGblZnZ3J2NmmdlqM1tvZq+HNqaIhNIt5+VTXdvAi+v2eB1FQqjLQjezeGAhMAcoBuaaWXG7Mf2Bh4ErnXPjgU+HIauIhMjMohxG56by86XbvY4iIRTMHvpUoNw5V+GcawQWA1e1G3Mj8KxzbieAc25/aGOKSCiZGZ89L581uw7p6kYxJJhCHwbsavO4MrCsrTHAADN7zcxWmtnNHb2Qmc0zszIzK6uqqupeYhEJiWvOGU56ko9faC89ZgRT6NbBsvbnDvuAc4FPAJcD/2pmY055knOLnHMlzrmS3NzcMw4rIqGTluTj0yV5vLB2D/uO6BDGWBBMoVcCeW0eDwd2dzDmT865OudcNfAGMDE0EUUkXG6eMZIW53hShzDGhGAKfQVQZGYFZpYIlALPtxvzO2CmmfnMLAWYBmwIbVQRCbX8nFQuGTuQp5bvoKG5xes40kNdFrpzrhlYALyEv6SXOOfWm9l8M5sfGLMB+BOwFngH+Klz7r3wxRaRULnl/Hyqaxt5Ya0OYYx25tVUmiUlJa6srMyT3y0if+ec42M/fIN+CfE8v+B8zDr62kwihZmtdM6VdLROZ4qK9HEfHsK47oPDvLvzkNdxpAdU6CLCNZOHkZ7s43EdwhjVVOgiQmqSjxtK8nhxnQ5hjGYqdBEB4OYZ+f5DGJft8DqKdJMKXUQAGJGdwqXjBvLk8p06hDFKqdBF5IRbziugpq6RP6zRIYzRSIUuIiecX5hN4cA0frF0O14d0izdp0IXkRPMjFtOHMKoWRijjQpdRE5yzTn+Qxh//rftXkeRM6RCF5GTpCT6KJ2Sxx/f28uew8e9jiNnQIUuIqe4eUY+rc7x5DLNwhhNVOgicoq8rBQ+etYgnnpnJ/VNOoQxWqjQRaRDnzsvnwN1jfx+TfvLH0ikUqGLSIdmjM5mzCAdwhhNVOgi0iH/IYwFrN99hJU7dAhjNFChi0inrp48lMx+CfxcszBGBRW6iHQqJdHHDVPy+JMOYYwKKnQROa2bpo/EOccTmoUx4qnQReS0ThzCuFyHMEa6oArdzGab2SYzKzezuztYP8vMDpvZ6sDtntBHFRGv3HJ+PgePNfG8DmGMaF0WupnFAwuBOUAxMNfMijsY+qZzblLg9u8hzikiHpoxKpuxg9L5xd90CGMkC2YPfSpQ7pyrcM41AouBq8IbS0QiiZlxy/n5vL/nCCu26xDGSBVMoQ8DdrV5XBlY1t4MM1tjZn80s/EdvZCZzTOzMjMrq6qq6kZcEfHK1ZOGkdkvgV8s3eZ1FOlEMIVuHSxr/57rXWCkc24i8CDw245eyDm3yDlX4pwryc3NPbOkIuKpfonxlE7N46X1+/jgkA5hjETBFHolkNfm8XDgpG9GnHNHnHO1gfsvAglmlhOylCISEW6aPhID/uuFDfosPQIFU+grgCIzKzCzRKAUeL7tADMbbGYWuD818Lo1oQ4rIt4aPiCFr142lhfW7eGpdzS1bqTxdTXAOddsZguAl4B44DHn3Hozmx9Y/whwHXC7mTUDx4FSp/98i8SkL1w4ircravj337/PuSMHMG5whteRJMC86t2SkhJXVlbmye8WkZ6prm1gzgNvkpHs4/d3XEBKYpf7hhIiZrbSOVfS0TqdKSoiZywnLYkf3TCJiuo67vndeq/jSIAKXUS65fzCHO64uJBnVlby3KpKr+MIKnQR6YE7Ly1ian4W33ruPSqqar2O0+ep0EWk23zxcTwwdxJJvjgWPLVKk3d5TIUuIj0yJLMf//3piby/5wjffXGD13H6NBW6iPTYpWcN4vMXFPD42zv403t7vI7TZ6nQRSQk7po9jgnDM/nnZ9ay68Axr+P0SSp0EQmJRF8cD809B+fgzsWraGpp9TpSn6NCF5GQGZGdwnevPZtVOw/x/Zc3ex2nz1Ghi0hIfXLCUOZOHcEjr2/ltU37vY7Tp6jQRSTkvn1FMWMHpfPVJWvYd6Te6zh9hgpdREIuOSGeh26czLHGFv5x8WpaWjVXX29QoYtIWBQNSuffrhrP2xU1LHy13Os4fYIKXUTC5tPnDufqSUP50V82s7xCl0gINxW6iISNmfEfnzqbkdmp3Ll4FQfqGr2OFNNU6CISVmlJPh6cO5mDdU187X/X6NJ1YaRCF5Gw+8iwTL71ibN4ZeN+fvbWNq/jxCwVuoj0iptnjOTy8YO4908bWbXzoNdxYlJQhW5ms81sk5mVm9ndpxk3xcxazOy60EUUkVhgZtx37UQGZSRz+xPvsv+ojk8PtS4L3czigYXAHKAYmGtmxZ2Muxf/xaRFRE6RmZLAozedy6HjjXzxiXdpbNZ8L6EUzB76VKDcOVfhnGsEFgNXdTDuDuA3gM71FZFOjR+ayf3XTaRsx0G+/byuRxpKwRT6MGBXm8eVgWUnmNkw4FPAI6d7ITObZ2ZlZlZWVVV1pllFJEZcMXEot88azdPv7OTJ5Tu8jhMzgil062BZ++OOfgTc5Zw77fWnnHOLnHMlzrmS3NzcYDOKSAz62mVjmTU2l2//bj0rth/wOk5MCKbQK4G8No+HA7vbjSkBFpvZduA64GEzuzokCUUkJsXHGQ+UTiYvK4Xbn1jJ7kPHvY4U9YIp9BVAkZkVmFkiUAo833aAc67AOZfvnMsHngG+6Jz7bcjTikhMyeyXwE9uPpf6pla+8KuVush0D3VZ6M65ZmAB/qNXNgBLnHPrzWy+mc0Pd0ARiW2FA9P5wfUTWffBYb757DqdSdoDvmAGOedeBF5st6zDL0Cdc7f0PJaI9CWXjR/MVz46hh/+ZTPFQzO4beYoryNFJZ0pKiIR4Y5LCrl8/CD+68UNvLWl2us4UUmFLiIRIS7O+P71kygcmMaCp99lZ80xryNFHRW6iESMtCQfi24qobXVMe9XZdQ1NHsdKaqo0EUkouTnpPLgjeewed9Rvv6Mpts9Eyp0EYk4F43J5a7Z43hx3V4efm2r13GihgpdRCLSvAtHceXEofz3y5t4ZeM+r+NEBRW6iEQkM+PeaydQPCSDLz+9mq1VtV5HingqdBGJWP0S43n0pnNJ8MXxD78s40h9k9eRIpoKXUQi2vABKTz8mXPYUXOMryxeTWurviTtjApdRCLe9FHZ3PPJYv66cT8//Mtmr+NELBW6iESFm2eM5PqS4Tz4Srm+JO2ECl1EooKZ8Z2rP8Ko3FTu+9MmffTSARW6iESNJF88Cy4uZOPeo/xlg/bS21Ohi0hUuXLiUEZkpfDQq+U6i7QdFbqIRBVffBxfnDWatZWHeUOzMp5EhS4iUeeac4YzNDOZB/+6RXvpbajQRSTqJPrimD9rNGU7DrKsQheY/pAKXUSi0vUleeSmJ/HgK1u8jhIxVOgiEpWSE+KZN3MUS7fWsHKH9tIhyEI3s9lmtsnMys3s7g7WX2Vma81stZmVmdkFoY8qInKyz0wfwYCUBB58pdzrKBGhy0I3s3hgITAHKAbmmllxu2F/BSY65yYBtwI/DXVQEZH2UhJ93DZzFK9tqmJd5WGv43gumD30qUC5c67COdcILAauajvAOVfr/v5Vcyqgr51FpFfcPGMkGck+HnpVn6UHU+jDgF1tHlcGlp3EzD5lZhuBF/DvpZ/CzOYFPpIpq6qq6k5eEZGTpCcncMv5Bby0fh8b9x7xOo6ngil062DZKXvgzrnnnHPjgKuB73T0Qs65Rc65EudcSW5u7pklFRHpxK3n55OaGM/CV/v25eqCKfRKIK/N4+HA7s4GO+feAEabWU4Ps4mIBKV/SiI3zcjnD2t39+krGwVT6CuAIjMrMLNEoBR4vu0AMys0MwvcPwdIBGpCHVZEpDO3zSwgyRfHw314L73LQnfONQMLgJeADcAS59x6M5tvZvMDw64F3jOz1fiPiLnB6XxcEelFOWlJ3Dh1JL9d/QE7a455HccT5lXvlpSUuLKyMk9+t4jEpr2H67nwvle59tzhfPeas72OExZmttI5V9LROp0pKiIxY3BmMtdPGc4zK3ex+9Bxr+P0OhW6iMSU+ReNxjlY9EaF11F6nQpdRGLK8AEpXHPOMJ5+Zyf7j9Z7HadXqdBFJOZ8cVYhTS2t/PTNbV5H6VUqdBGJOfk5qVw5cShPLNvBgbpGr+P0GhW6iMSkL11cyLHGFh57q+/spavQRSQmFQ1KZ85HBvP40u0cPt7kdZxeoUIXkZi14JJCjjY08/jS7V5H6RUqdBGJWeOHZnLpuIE89rdt1DY0ex0n7FToIhLTFlxSyKFjTTy5bIfXUcJOhS4iMW3yiAHMLMrhJ29WcLyxxes4YaVCF5GYd8clRVTXNrJ4xU6vo4SVCl1EYt7UgiymFmTx6OsVNDTH7l66Cl1E+oQ7Lyli75F6nllZ6XWUsFGhi0ifcH5hNpPy+vM/r22lqaXV6zhh4fM6gIhIbzAz7rikkM8/Xsal33+dJJ93+7M3TMnjtpmjQv66KnQR6TMuGTeQL1w4il0Hvb2iUU5aUlheV4UuIn2GmfGNj5/ldYywCeo9h5nNNrNNZlZuZnd3sP4zZrY2cFtqZhNDH1VERE6ny0I3s3j8F36eAxQDc82suN2wbcBFzrkJwHeARaEOKiIipxfMHvpUoNw5V+GcawQWA1e1HeCcW+qcOxh4uAwYHtqYIiLSlWAKfRiwq83jysCyznwe+GNHK8xsnpmVmVlZVVVV8ClFRKRLwRS6dbDMdTjQ7GL8hX5XR+udc4uccyXOuZLc3NzgU4qISJeCOcqlEshr83g4sLv9IDObAPwUmOOcqwlNPBERCVYwe+grgCIzKzCzRKAUeL7tADMbATwL3OSc2xz6mCIi0pUu99Cdc81mtgB4CYgHHnPOrTez+YH1jwD3ANnAw2YG0OycKwlfbBERac+c6/Dj8PD/YrMqoLszzucA1SGME27KGz7RlBWiK280ZYXoytuTrCOdcx1+CelZofeEmZVF0zsA5Q2faMoK0ZU3mrJCdOUNV1bNtigiEiNU6CIiMSJaCz3aphZQ3vCJpqwQXXmjKStEV96wZI3Kz9BFRORU0bqHLiIi7ajQRURiREQUehDzrZuZ/Tiwfq2ZndPVc80sy8z+bGZbAj8HRHDW+81sY2D8c2bWPxRZw5W3zfqvmZkzs5xIzmpmdwTWrTez+0KRNVx5zWySmS0zs9WBieymRkDWx8xsv5m91+45YdnGwpg3UrezDvO2WR/8duac8/SG/+zTrcAoIBFYAxS3G/Nx/DM4GjAdWN7Vc4H7gLsD9+8G7o3grJcBvsD9e0ORNZx5A+vz8J89vAPIidSswMXAX4CkwOOBkfy3BV7GPx/Sh89/zcusgXUXAucA77V7Tsi3sTDnjbjt7HR5u7OdRcIeepfzrQce/9L5LQP6m9mQLp57FfB44P7jwNWRmtU597Jzrjnw/FDOJx+uvy3AD4F/ppOZNyMo6+3A95xzDQDOuf0RntcBGYH7mXQwEV4vZ8U59wZwoIPXDcc2Fra8Ebqdne7vC2e4nUVCoQcz33pnY0733EHOuT0AgZ8DI2pbqHwAAAIVSURBVDhrW7fSyXzy3RCWvGZ2JfCBc25NiHKGLSswBphpZsvN7HUzmxLhef8RuN/MdgH/DXzD46ynE45tLNgs3cnbVqRsZ53qznYWCReJDma+9c7GBD1Xe4iENauZfQtoBp7sVrpThTyvmaUA38L/9jWUwvW39QED8L/NnQIsMbNRLvB+tgfClfd24CvOud+Y2fXAz4CPdjvl6XOc6ZjeEta8EbaddfyC3dzOImEPPZj51jsbc7rn7vvwLU3gZyjeaocrK2b2WeCTwGdCUDbhzDsaKADWmNn2wPJ3zWxwBGb98DnPBt7qvgO04p8YqafClfez+KeiBvhf/G/nvcx6OuHYxoLN0p28kbiddaZ721kovhToyQ3/HlRFIPyHXyiMbzfmE5z8hcI7XT0XuJ+Tv7C5L4KzzgbeB3Kj4W/b7vnbCc2XouH6284H/j1wfwz+t70WwXk3ALMC9y8FVnqZtc36fE79kjHk21iY80bcdna6vN3ZzkL2j+rhH+TjwGb83xR/K7BsPjA/cN+AhYH164CS0z03sDwb+CuwJfAzK4KzluMvmtWB2yOR/Lftzv/RPPzbJgJPAO8B7wKXRPLfFrgAWIm/FJYD50ZA1qeBPUAT/j3Nz4dzGwtj3kjdzjrM253tTKf+i4jEiEj4DF1EREJAhS4iEiNU6CIiMUKFLiISI1ToIiIxQoUuIhIjVOgiIjHi/wDLPKIuKKxnBgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "rfc.fit(x,y).feature_importances_\n",
    "\n",
    "threshold = np.linspace(0,(rfc.fit(x,y).feature_importances_).max(),20)\n",
    "score = []\n",
    "\n",
    "for i in threshold:\n",
    "    x_embedded = SelectFromModel(rfc,threshold=i).fit_transform(x,y)\n",
    "    once = cross_val_score(rfc,x_embedded,y,cv=5).mean()\n",
    "    score.append(once)\n",
    "plt.plot(threshold,score)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(42000, 175)"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_embedded = SelectFromModel(rfc, threshold=0.002).fit_transform(x,y)\n",
    "x_embedded.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9314761904761906"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cross_val_score(rfc, x_embedded, y, cv=5).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 包装法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_selection import RFE\n",
    "\n",
    "rfc = RandomForestClassifier(n_estimators=10, random_state=0)\n",
    "selector = RFE(rfc, n_features_to_select=340, step=50).fit(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False,  True,  True,  True,  True,  True,\n",
       "        True, False,  True, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True, False, False, False, False, False,\n",
       "       False, False, False, False, False,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True, False, False, False, False,\n",
       "       False, False, False, False, False, False,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True, False, False, False,\n",
       "       False, False, False, False, False, False, False, False,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True, False, False,\n",
       "       False, False, False, False, False, False, False, False,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True, False, False, False, False, False, False, False, False,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True,  True,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True,  True,  True, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False,  True,  True,  True,  True,  True,  True,\n",
       "        True,  True,  True,  True,  True,  True, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False,  True,  True,  True,  True,  True,  True,  True,\n",
       "        True, False, False,  True, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "        True,  True, False, False, False,  True, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False, False, False, False,\n",
       "       False, False, False, False, False, False])"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selector.support_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9, 8, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 6, 6,\n",
       "       7, 7, 7, 7, 7, 7, 6, 7, 7, 6, 5, 4, 4, 6, 3, 3, 5, 6, 5, 5, 4, 6,\n",
       "       6, 6, 7, 7, 7, 7, 6, 7, 6, 5, 4, 4, 4, 3, 1, 1, 1, 1, 1, 1, 3, 1,\n",
       "       2, 4, 6, 5, 6, 7, 7, 8, 8, 7, 8, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 3, 3, 5, 4, 5, 8, 8, 8, 8, 6, 5, 4, 3, 2, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 5, 9, 9, 9, 9, 6, 5, 5, 3, 2,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 6, 8, 9, 9,\n",
       "       9, 5, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       5, 9, 9, 9, 9, 7, 5, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 4, 6, 9, 9, 9, 7, 6, 4, 3, 2, 2, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 5, 9, 9, 9, 8, 6, 4, 4, 2,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 9, 9, 9,\n",
       "       9, 9, 9, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       3, 5, 5, 6, 8, 9, 9, 9, 5, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 2, 4, 6, 6, 9, 9, 9, 9, 6, 4, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 5, 9, 9, 9, 9, 5, 5, 3,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 6, 7, 9,\n",
       "       9, 7, 5, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3,\n",
       "       4, 5, 9, 9, 9, 9, 5, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 3, 4, 5, 6, 9, 9, 9, 6, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 5, 7, 8, 9, 8, 8, 5, 4, 3, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 5, 6, 7, 8, 6,\n",
       "       5, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3,\n",
       "       5, 6, 8, 8, 7, 5, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 3, 5, 5, 7, 8, 8, 8, 8, 6, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 5, 7, 7, 8, 8, 8, 6, 6, 5, 3, 3, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 5, 6, 6, 8, 8, 8, 8,\n",
       "       6, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 6, 5, 6, 7,\n",
       "       7, 7, 7, 6, 5, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 4, 3, 4, 5,\n",
       "       6, 8, 7, 8, 8, 8, 5, 4, 4, 4, 1, 1, 3, 3, 2, 1, 3, 3, 3, 3, 4, 4,\n",
       "       5, 5, 6, 8, 9, 9, 9, 6, 6, 9, 6, 6, 5, 4, 4, 4, 4, 4, 5, 4, 4, 5,\n",
       "       5, 5, 6, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 7, 8, 8, 6, 8, 7, 8, 8, 8,\n",
       "       8, 8, 9, 7])"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selector.ranking_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_wrapper = selector.transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9385476190476192"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cross_val_score(rfc, x_wrapper, y, cv = 5).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
